Kernel Author:
Bhishan Poudel, Ph.D Contd. Astrophysics

Date    : Jan 31, 2020
Topic    : ngals5k z1.5

Introduction

Update

  1. Looked at gm0 vs gc0 (and gm1 vs gc1) 45 degree line and removed outliers.
  2. Find the weights for g_sq for given magnitude bins using smooth fitting curve.

Usual Filtering

df = df.query('calib_psfCandidate == 0.0')
df = df.query('deblend_nChild == 0.0')
df['ellip'] = np.hypot( df['ext_shapeHSM_HsmShapeRegauss_e1'] ,
                        df['ext_shapeHSM_HsmShapeRegauss_e2'] )
df = df.query('ellip < 2.0') # it was 1.5 before

#select only few columns after filtering:
cols_select = ['base_SdssCentroid_x', 'base_SdssCentroid_y',
                'base_SdssCentroid_xSigma','base_SdssCentroid_ySigma',
                'ext_shapeHSM_HsmShapeRegauss_e1','ext_shapeHSM_HsmShapeRegauss_e2',
                'base_SdssShape_flux']
df = df[cols_select]        

# drop all nans
df = df.dropna()

# additional columns
df['radius'] =  df.eval(""" ( (ext_shapeHSM_HsmSourceMoments_xx *  ext_shapeHSM_HsmSourceMoments_yy) \
                                          -  (ext_shapeHSM_HsmSourceMoments_xy**2 ) )**0.25 """)

Shape filtering
https://github.com/LSSTDESC/DC2-analysis/blob/master/tutorials/object_gcr_2_lensing_cuts.ipynb

df = df.query('ext_shapeHSM_HsmShapeRegauss_resolution >= 0.3')
df = df.query('ext_shapeHSM_HsmShapeRegauss_sigma <= 0.4')
df = df.query('ext_shapeHSM_HsmShapeRegauss_flag== 0.0')

Filter strongly lensed objects

  • Take the objects with centroids >154 pixels (remove strong lens objects).
    # exclude strong lens objects <=154 distance
    # The shape of lsst.fits file is 3998,3998 and center is 1699,1699.
    df['x_center'] = 1699
    df['y_center'] = 1699
    df['distance'] = ( (df['x[0]'] - df['x_center'])**2 + (df['x[1]'] - df['y_center'])**2 )**0.5
    df = df[df.distance > 154]
    

Imcat script

# create new columns and cleaning (four files)
lc -C -n fN -n id -N '1 2 x' -N '1 2 errx' -N '1 2 g' -n ellip -n flux -n radius < "${M9T}".txt  |  lc +all 'mag = %flux log10 -2.5 *'  |  cleancat 15  |  lc +all -r 'mag' > "${M9C}".cat


# merge 4 catalogs
mergecats 5 "${MC}".cat "${M9C}".cat "${LC}".cat "${L9C}".cat > ${catalogs}/merge.cat &&


lc -b +all 
'x = %x[0][0] %x[1][0] + %x[2][0] + %x[3][0] + 4 / %x[0][1] %x[1][1] + %x[2][1] + %x[3][1] + 4 / 2 vector'
'gm = %g[0][0] %g[1][0] + 2 / %g[0][1] %g[1][1] + 2 / 2 vector' 
'gc = %g[2][0] %g[3][0] + 2 / %g[2][1] %g[3][1] + 2 / 2 vector'   
'gmd = %g[0][0] %g[1][0] - 2 / %g[0][1] %g[1][1] - 2 / 2 vector' 
'gcd = %g[2][0] %g[3][0] - 2 / %g[2][1] %g[3][1] - 2 / 2 vector' 
< ${catalogs}/merge.cat > ${final}/final_${i}.cat

Notes

final_text.txt is created by imcat program after merging four lsst files (m,m9,l,l9) after cleaning.

Imports

In [30]:
import json, os,sys
import numpy as np
import pandas as pd
import seaborn as sns
import time
sns.set(color_codes=True)

import plotly
import ipywidgets

pd.set_option('display.max_columns',200)
time_start_notebook = time.time()

import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline

print([(x.__name__, x.__version__) for x in [np,pd,sns,plotly,ipywidgets]])
[('numpy', '1.18.4'), ('pandas', '1.0.3'), ('seaborn', '0.10.1'), ('plotly', '4.8.0'), ('ipywidgets', '7.5.1')]
In [31]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;

Load the final text cleancat15 data

g_sq = g00 g00 + g10 g10
gmd_sq = gmd0**2 + gmd1**2
In [32]:
!head -2 ../data/cleancat/final_text_cleancat15_ngals5k_z0.5_125_280.txt
#       fN[0][0]       fN[1][0]       fN[2][0]       fN[3][0]       id[0][0]       id[1][0]       id[2][0]       id[3][0]           x[0]           x[1]     errx[0][0]     errx[0][1]     errx[1][0]     errx[1][1]     errx[2][0]     errx[2][1]     errx[3][0]     errx[3][1]        g[0][0]        g[0][1]        g[1][0]        g[1][1]        g[2][0]        g[2][1]        g[3][0]        g[3][1]    ellip[0][0]    ellip[1][0]    ellip[2][0]    ellip[3][0]     flux[0][0]     flux[1][0]     flux[2][0]     flux[3][0]   radius[0][0]   radius[1][0]   radius[2][0]   radius[3][0]      mag[0][0]      mag[1][0]      mag[2][0]      mag[3][0]          gm[0]          gm[1]          gc[0]          gc[1]         gmd[0]         gmd[1]         gcd[0]         gcd[1]
             125            125            125            125            673            669            679            665       2812.747      1504.8803         0.0243         0.0167          0.016         0.0271         0.0243         0.0167         0.0161         0.0271         1.0389           0.33        -1.0468        -0.2552         0.9883         0.3218        -1.0469        -0.2572      1.0900519      1.0774587       1.039371      1.0780313      37423.424      37664.673      37345.853      37586.541      4.1578392      4.2172688      4.1575487      4.2169835     -11.432859     -11.439836     -11.430606     -11.437581       -0.00395         0.0374        -0.0293         0.0323        1.04285         0.2926         1.0176         0.2895
In [33]:
names = "fN[0][0]       fN[1][0]       fN[2][0]       fN[3][0]       id[0][0]       id[1][0]       id[2][0]       id[3][0]           x[0]           x[1]     errx[0][0]     errx[0][1]     errx[1][0]     errx[1][1]     errx[2][0]     errx[2][1]     errx[3][0]     errx[3][1]        g[0][0]        g[0][1]        g[1][0]        g[1][1]        g[2][0]        g[2][1]        g[3][0]        g[3][1]    ellip[0][0]    ellip[1][0]    ellip[2][0]    ellip[3][0]     flux[0][0]     flux[1][0]     flux[2][0]     flux[3][0]   radius[0][0]   radius[1][0]   radius[2][0]   radius[3][0]      mag[0][0]      mag[1][0]      mag[2][0]      mag[3][0]          gm[0]          gm[1]          gc[0]          gc[1]         gmd[0]         gmd[1]         gcd[0]         gcd[1]"
print(names)
fN[0][0]       fN[1][0]       fN[2][0]       fN[3][0]       id[0][0]       id[1][0]       id[2][0]       id[3][0]           x[0]           x[1]     errx[0][0]     errx[0][1]     errx[1][0]     errx[1][1]     errx[2][0]     errx[2][1]     errx[3][0]     errx[3][1]        g[0][0]        g[0][1]        g[1][0]        g[1][1]        g[2][0]        g[2][1]        g[3][0]        g[3][1]    ellip[0][0]    ellip[1][0]    ellip[2][0]    ellip[3][0]     flux[0][0]     flux[1][0]     flux[2][0]     flux[3][0]   radius[0][0]   radius[1][0]   radius[2][0]   radius[3][0]      mag[0][0]      mag[1][0]      mag[2][0]      mag[3][0]          gm[0]          gm[1]          gc[0]          gc[1]         gmd[0]         gmd[1]         gcd[0]         gcd[1]
In [34]:
names = ['fN[0][0]','fN[1][0]','fN[2][0]','fN[3][0]',
 'id[0][0]','id[1][0]','id[2][0]','id[3][0]',
 'x[0]','x[1]',
 'errx[0][0]','errx[0][1]','errx[1][0]','errx[1][1]','errx[2][0]',
 'errx[2][1]','errx[3][0]','errx[3][1]',
 'g[0][0]','g[0][1]','g[1][0]','g[1][1]','g[2][0]','g[2][1]','g[3][0]','g[3][1]',
 'ellip[0][0]','ellip[1][0]','ellip[2][0]','ellip[3][0]',
 'flux[0][0]','flux[1][0]','flux[2][0]','flux[3][0]',
 'radius[0][0]','radius[1][0]','radius[2][0]','radius[3][0]',
 'mag[0][0]','mag[1][0]','mag[2][0]','mag[3][0]',
 'gm[0]','gm[1]','gc[0]', 'gc[1]',
 'gmd[0]','gmd[1]','gcd[0]','gcd[1]']


file_path = '../data/cleancat/final_text_cleancat15_ngals5k_z0.5_125_280.txt'


df = pd.read_csv(file_path,comment='#',engine='python',sep=r'\s\s+',
                 header=None,names=names)

print(df.shape)

# new columns
# df['g_sq'] = df['g[0][0]'] **2 + df['g[1][0]']**2 # only for imcat 00 and 10
# df['gmd_sq'] = df['gmd[0]'] **2 + df['gmd[1]']**2

df['g_sq'] = df['g[0][0]'] **2 + df['g[0][1]']**2
df['gmd_sq'] = df['gmd[0]'] **2 + df['gmd[1]']**2

df['gm_sq'] = df['gm[0]']**2 + df['gm[1]']**2
df['gc_sq'] = df['gc[0]']**2 + df['gc[1]']**2

df['mag_mono'] = (df['mag[0][0]'] + df['mag[1][0]'] ) / 2
df['mag_chro'] = (df['mag[2][0]'] + df['mag[3][0]'] ) / 2

df.head()
(35461, 50)
Out[34]:
fN[0][0] fN[1][0] fN[2][0] fN[3][0] id[0][0] id[1][0] id[2][0] id[3][0] x[0] x[1] errx[0][0] errx[0][1] errx[1][0] errx[1][1] errx[2][0] errx[2][1] errx[3][0] errx[3][1] g[0][0] g[0][1] g[1][0] g[1][1] g[2][0] g[2][1] g[3][0] g[3][1] ellip[0][0] ellip[1][0] ellip[2][0] ellip[3][0] flux[0][0] flux[1][0] flux[2][0] flux[3][0] radius[0][0] radius[1][0] radius[2][0] radius[3][0] mag[0][0] mag[1][0] mag[2][0] mag[3][0] gm[0] gm[1] gc[0] gc[1] gmd[0] gmd[1] gcd[0] gcd[1] g_sq gmd_sq gm_sq gc_sq mag_mono mag_chro
0 125 125 125 125 673 669 679 665 2812.74700 1504.88030 0.0243 0.0167 0.0160 0.0271 0.0243 0.0167 0.0161 0.0271 1.0389 0.3300 -1.0468 -0.2552 0.9883 0.3218 -1.0469 -0.2572 1.090052 1.077459 1.039371 1.078031 37423.4240 37664.6730 37345.8530 37586.5410 4.157839 4.217269 4.157549 4.216983 -11.432859 -11.439836 -11.430606 -11.437581 -0.00395 0.03740 -0.02930 0.03230 1.04285 0.29260 1.01760 0.28950 1.188213 1.173151 0.001414 0.001902 -11.436348 -11.434093
1 125 125 125 125 3624 3617 3627 3615 1476.06470 2177.74190 0.0575 0.0381 0.0362 0.0575 0.0573 0.0381 0.0362 0.0580 1.0092 0.2622 -1.0400 -0.1400 0.9842 0.2581 -1.0684 -0.1422 1.042705 1.049381 1.017480 1.077822 16508.8310 16472.9350 16469.9190 16437.5730 4.225692 4.127967 4.224107 4.127593 -10.544291 -10.541927 -10.541729 -10.539594 -0.01540 0.06110 -0.04210 0.05795 1.02460 0.20110 1.02630 0.20015 1.087233 1.090246 0.003970 0.005131 -10.543109 -10.540662
2 125 125 125 125 4006 3988 3998 3986 330.59460 2532.93750 0.0737 0.0814 0.0701 0.0828 0.0731 0.0812 0.0699 0.0830 -0.1782 0.8218 -0.3298 0.9130 -0.1521 0.8391 -0.3764 0.9629 0.840899 0.970740 0.852774 1.033854 6819.5376 6803.3047 6798.0851 6809.5060 4.200198 4.138292 4.185054 4.139294 -9.584387 -9.581800 -9.580966 -9.582789 -0.25400 0.86740 -0.26425 0.90100 0.07580 -0.04560 0.11215 -0.06190 0.707110 0.007825 0.816899 0.881629 -9.583094 -9.581878
3 125 125 125 125 3765 3756 3760 3749 2062.74510 2337.12310 0.0578 0.0389 0.0566 0.0406 0.0576 0.0388 0.0567 0.0406 0.7578 -0.7258 0.6626 -0.8152 0.7534 -0.7081 0.6832 -0.8223 1.049308 1.050519 1.033933 1.069083 22224.8360 21847.7210 22168.7250 21830.3450 4.870533 4.787105 4.854372 4.789153 -10.867096 -10.848515 -10.864352 -10.847651 0.71020 -0.77050 0.71830 -0.76520 0.04760 0.04470 0.03510 0.05710 1.101046 0.004264 1.098054 1.101486 -10.857805 -10.856001
4 125 125 125 125 2221 2338 2229 2337 835.16188 811.38905 0.2591 0.2448 0.2588 0.2426 0.2692 0.2516 0.2718 0.2589 0.3038 -0.8074 0.2382 -0.8645 0.3133 -0.6102 0.1663 -0.8017 0.862664 0.896716 0.685931 0.818766 6034.2829 6037.6519 6177.2667 6151.1167 4.543778 4.522524 4.822348 4.762993 -9.451564 -9.452170 -9.476991 -9.472385 0.27100 -0.83595 0.23980 -0.70595 0.03280 0.02855 0.07350 0.09575 0.744189 0.001891 0.772253 0.555869 -9.451867 -9.474688

Plot g-squared for monochromatic and chromatic files

In [35]:
if not os.path.isdir('images'):
    os.makedirs('images')
In [36]:
fig,ax = plt.subplots(1,2,figsize=(14,6))

df['gm_sq'].plot.hist(bins=50,ax=ax[0],color='b',label='mono')
ax[0].set_xlabel('gm_sq')
ax[0].legend()
ax[0].set_title('g-squared histogram for mono')

# gcsq
df['gc_sq'].plot.hist(bins=50,ax=ax[1],color='r',label='chro')
ax[1].set_xlabel('gc_sq')
ax[1].legend()
ax[1].set_title('g-squared histogram for chro')

# savefig
plt.savefig('images/gmsq_gcsq_hist.png')
In [37]:
plt.figure(figsize=(12,8))
sns.distplot(df['g_sq'],label='g_sq')
sns.distplot(df['gmd_sq'],label='gmd_sq')

plt.legend()
Out[37]:
<matplotlib.legend.Legend at 0x7fe5e6982ad0>

Contour Plots for Diagonal cuts

In [38]:
import plotly
import plotly.graph_objs as go
import plotly.figure_factory as ff
import plotly.tools as tls
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=False)
In [39]:
def matrix_of_number_density_from_two_cols(df,xcol,ycol,N):
    """Create grid of number density of two columns
    
    - Find the absolute max from two columns.
    - Create N bins -absMax to +absMax.
    - Return a matrix of N*N shape.
    """
    from itertools import product
    
    # derived variables
    xlabel = xcol
    ylabel = ycol
    xlabel1 = xlabel + '_cat_labels'
    ylabel1 = ylabel + '_cat_labels'
    
    xlabel2 = xlabel + '_cat'
    ylabel2 = ylabel + '_cat'
    colname = 'cat_freq'
    
    # take only xlabel and ylabel columns
    dx = df[[xlabel, ylabel]].copy()
    
    # get absolute maximum from two columns
    tolerance = 0.0000001
    max_abs_xcol_ycol = df[[xcol,ycol]].describe().iloc[[3,-1],:].abs().max().max()
    
    # create 1d array with N+1 values to create N bins
#     bins = np.linspace(-max_abs_xcol_ycol-tolerance, max_abs_xcol_ycol+tolerance,N+1)
    bins = np.linspace(0, max_abs_xcol_ycol,N+1)

    # create N bins
    dx[xlabel1] = pd.cut(dx[xlabel], bins, labels=np.arange(N))
    dx[ylabel1] = pd.cut(dx[ylabel], bins, labels=np.arange(N))

    # count number of points in each bin
    dx[colname] = dx.groupby([xlabel1,ylabel1])[xlabel1].transform('count')

    # drop duplicates
    dx1 = dx.drop_duplicates(subset=[xlabel1,ylabel1])[[xlabel1,ylabel1,colname]]

    # use permutation to get the grid of N * N
    perms = list(product(range(N), range(N)))
    x = [i[0] for i in perms]
    y = [i[1] for i in perms]
    dx2 = pd.DataFrame({xlabel1: x, ylabel1: y, colname:0})

    # update dx2 to merge frequency values
    dx2.update(dx2.drop(colname,1).merge(dx1,how='left'))
    dx2 = dx2.astype(int)

    # z values to plot heatmap
    z = dx2[colname].values.reshape(N,N)
    z = z.T

    return z

Transform and scale the data

In [40]:
def transform_scale(z,transform='linear',scale=None):
    """Transform and scale given 1d numpy array.
    
    transform: linear, log, sqrt, sinh, arcsinh
    scale    : minmax, zscale
    
    """
    #==================================
    # linear, log, sqrt, arcsinh, sinh 
    #
    # we need linear tranform option to compare.
    if transform == 'linear':
        z = z

    if transform == 'log':
        z = np.log1p(z)
        
    if transform == 'sqrt':
        z = np.sqrt(z)

    if transform == 'sinh':
        z = np.sinh(z)
        
    if transform == 'arcsinh':
        z = np.arcsinh(z)
    
    #===============================
    # scaling minmax and zscale
    if scale== 'minmax':
        z = z / (z.max()-z.min())
    if scale == 'zscale':
        z = (z-z.mean()) / z.std()
    if scale is None:
        z = z
        
    return z

plot the countours

In [41]:
def plot_contour(Z,colorscale,x1y1x2y2=None,
                 title='Contour plot',
                 xlabel='',
                 ylabel=''):
    """Plot the contour plot.

    Contour plot from given 2d array.
    
    Example:
    -----------
    z  = matrix_of_number_density_from_two_cols(df,'gsq','gmdsq',N)
    z1 = transform_scale(z,transform=transform,scale=scale)

    plot_contour(z1, colorscale='Viridis', x1y1x2y2=[10,0,99,90],
            title=f'Contour plot: {transform}+{scale}',
            xlabel='Bin number of gsq',
            ylabel='Bin number of gmsq')
    
    """

    trace1 = go.Contour(z=Z, colorscale=colorscale)
    axis_layout = dict(
        showticklabels = True
    )
    
    xaxis = {**axis_layout, **{'title':f'{xlabel}'}}
    yaxis = {**axis_layout, **{'title':f'{ylabel}'}}

    layout = go.Layout(
        width=1000,
        height=1000,
        autosize=False,
        title=f'{title} ',
        xaxis = xaxis,
        yaxis = yaxis,
    )
    

    data = [trace1]
    
    if x1y1x2y2:

        # center line 
        p2x1, p2y1 = 0,0
        p2x2, p2y2 = 99,99
        sc1 = go.Scatter(x=[p2x1,p2x2],
                         y=[p2y1,p2y2],
                         mode = 'lines+markers',
                         name = f'line ({p2x1},{p2y1}) to ({p2x2},{p2y2})')
        
        sc2 = go.Scatter(x=[x1y1x2y2[0],x1y1x2y2[2]],
                         y=[x1y1x2y2[1],x1y1x2y2[3]],
                         mode = 'lines+markers',
                         name = f'line ({x1y1x2y2[0]},{x1y1x2y2[1]}) \
                         to ({x1y1x2y2[2]},{x1y1x2y2[3]})')


        data = [trace1, sc1, sc2]

    fig = go.Figure( data=data, layout=layout )
    
    # update figure
    fig.update_layout(
    xaxis = dict(tickmode = 'linear',dtick = 5),
    yaxis = dict(tickmode = 'linear',dtick = 5),
    )

    iplot(fig)
    return None

N = 100
transform='log' # linear log sqrt sinh
scale='minmax'


xcol,ycol = 'g_sq', 'gmd_sq'
z  = matrix_of_number_density_from_two_cols(df,xcol,ycol,N)
z1 = transform_scale(z,transform=transform,scale=scale)

plot_contour(z1, colorscale='Viridis', x1y1x2y2=[10,0,99,90],
            title=f'Contour plot for transform =  {transform} and scaling = {scale}',
            xlabel=f'Bin number of {xcol}',
            ylabel=f'Bin number of {ycol}')

Grid of N*N from g_sq and gmd_sq

In [42]:
N = 100
xcol = 'g_sq'
ycol = 'gmd_sq'
max_abs_xcol_ycol = df[[xcol,ycol]].abs().max().max()

print(max_abs_xcol_ycol)

df[df[xcol]==max_abs_xcol_ycol]
3.91756097
Out[42]:
fN[0][0] fN[1][0] fN[2][0] fN[3][0] id[0][0] id[1][0] id[2][0] id[3][0] x[0] x[1] errx[0][0] errx[0][1] errx[1][0] errx[1][1] errx[2][0] errx[2][1] errx[3][0] errx[3][1] g[0][0] g[0][1] g[1][0] g[1][1] g[2][0] g[2][1] g[3][0] g[3][1] ellip[0][0] ellip[1][0] ellip[2][0] ellip[3][0] flux[0][0] flux[1][0] flux[2][0] flux[3][0] radius[0][0] radius[1][0] radius[2][0] radius[3][0] mag[0][0] mag[1][0] mag[2][0] mag[3][0] gm[0] gm[1] gc[0] gc[1] gmd[0] gmd[1] gcd[0] gcd[1] g_sq gmd_sq gm_sq gc_sq mag_mono mag_chro
25431 233 233 233 233 3779 3778 3700 3695 1603.9956 2291.9362 0.0266 0.0263 0.025 0.0311 0.0342 0.0275 0.0259 0.0309 -0.3001 1.9564 -0.8341 0.8838 -0.1789 1.2475 -0.6942 0.9649 1.979283 1.215247 1.260263 1.188674 16521.265 17662.844 19831.667 19038.667 4.253503 4.388925 4.988621 4.654104 -10.545108 -10.617652 -10.743398 -10.699091 -0.5671 1.4201 -0.43655 1.1062 0.267 0.5363 0.25765 0.1413 3.917561 0.358907 2.338286 1.414254 -10.58138 -10.721245
In [43]:
# create 1d array with N+1 values to create N bins
bins = np.linspace(0, max_abs_xcol_ycol,N+1)

# bins dict
bins_dict = {i:v for i,v in enumerate(bins)}

# create N bins
ser_ycol_bins = pd.cut(df[ycol], bins=bins,)

df['g_sq_bins'] = ser_ycol_bins
df[['g_sq','g_sq_bins']].head()
Out[43]:
g_sq g_sq_bins
0 1.188213 (1.136, 1.175]
1 1.087233 (1.058, 1.097]
2 0.707110 (0.0, 0.0392]
3 1.101046 (0.0, 0.0392]
4 0.744189 (0.0, 0.0392]
In [44]:
# bin 0  ==> 0          to 0.03994369
# bin 1  ==> 0.03994369 to 0.07988739
# bin 10 ==> 0.39943694 to 0.43938063

bins[10], bins[11]
Out[44]:
(0.39175609699999997, 0.4309317067)

Analysis of Points above gmd_sq bin number 10

What is the corresponding gmsq value to y-axis bin number 10 (11th bin)?

The 100*100 bin is created from absMax of g_sq and gmd_sq. Then the line 0 to absMax is divided into 100 parts and bins are created.

bin 10 for gmd_sq is 0.39943694 to 0.43938063

In [45]:
# take all points where gmd_sq > 10th bin

df_gmd_sq_lt_bin10 = df[df.gmd_sq > bins[10]]

print(df_gmd_sq_lt_bin10.shape)

df_gmd_sq_lt_bin10.head()
(12573, 57)
Out[45]:
fN[0][0] fN[1][0] fN[2][0] fN[3][0] id[0][0] id[1][0] id[2][0] id[3][0] x[0] x[1] errx[0][0] errx[0][1] errx[1][0] errx[1][1] errx[2][0] errx[2][1] errx[3][0] errx[3][1] g[0][0] g[0][1] g[1][0] g[1][1] g[2][0] g[2][1] g[3][0] g[3][1] ellip[0][0] ellip[1][0] ellip[2][0] ellip[3][0] flux[0][0] flux[1][0] flux[2][0] flux[3][0] radius[0][0] radius[1][0] radius[2][0] radius[3][0] mag[0][0] mag[1][0] mag[2][0] mag[3][0] gm[0] gm[1] gc[0] gc[1] gmd[0] gmd[1] gcd[0] gcd[1] g_sq gmd_sq gm_sq gc_sq mag_mono mag_chro g_sq_bins
0 125 125 125 125 673 669 679 665 2812.74700 1504.8803 0.0243 0.0167 0.0160 0.0271 0.0243 0.0167 0.0161 0.0271 1.0389 0.3300 -1.0468 -0.2552 0.9883 0.3218 -1.0469 -0.2572 1.090052 1.077459 1.039371 1.078031 37423.4240 37664.6730 37345.8530 37586.5410 4.157839 4.217269 4.157549 4.216983 -11.432859 -11.439836 -11.430606 -11.437581 -0.00395 0.03740 -0.02930 0.03230 1.04285 0.29260 1.01760 0.28950 1.188213 1.173151 0.001414 0.001902 -11.436348 -11.434093 (1.136, 1.175]
1 125 125 125 125 3624 3617 3627 3615 1476.06470 2177.7419 0.0575 0.0381 0.0362 0.0575 0.0573 0.0381 0.0362 0.0580 1.0092 0.2622 -1.0400 -0.1400 0.9842 0.2581 -1.0684 -0.1422 1.042705 1.049381 1.017480 1.077822 16508.8310 16472.9350 16469.9190 16437.5730 4.225692 4.127967 4.224107 4.127593 -10.544291 -10.541927 -10.541729 -10.539594 -0.01540 0.06110 -0.04210 0.05795 1.02460 0.20110 1.02630 0.20015 1.087233 1.090246 0.003970 0.005131 -10.543109 -10.540662 (1.058, 1.097]
9 125 125 125 125 1107 1107 1120 1110 231.78355 2476.4725 0.1350 0.0985 0.0934 0.1466 0.1341 0.0994 0.0942 0.1476 1.0104 0.2082 -1.0733 -0.0683 1.0468 0.2275 -1.1408 -0.0708 1.031627 1.075471 1.071236 1.142995 6155.8654 6185.8754 6138.1610 6168.6647 4.012843 4.027674 4.010899 4.025892 -9.473223 -9.478503 -9.470096 -9.475478 -0.03145 0.06995 -0.04700 0.07835 1.04185 0.13825 1.09380 0.14915 1.064255 1.104564 0.005882 0.008348 -9.475863 -9.472787 (1.097, 1.136]
11 125 125 125 125 3530 3472 3535 3483 1206.80990 2109.9034 0.0120 0.0079 0.0074 0.0119 0.0120 0.0080 0.0074 0.0120 0.7728 -0.5090 -0.7244 0.6217 0.7657 -0.5007 -0.7310 0.6325 0.925365 0.954603 0.914875 0.966653 113953.3100 113060.1300 113725.0100 112838.1800 5.003308 4.959239 5.003152 4.959238 -12.641817 -12.633274 -12.639640 -12.631140 0.02420 0.05635 0.01735 0.06590 0.74860 -0.56535 0.74835 -0.56660 0.856301 0.880023 0.003761 0.004644 -12.637545 -12.635390 (0.862, 0.901]
14 125 125 125 125 2746 2735 2754 2733 1377.81840 1231.8038 0.0861 0.1384 0.1375 0.0916 0.0858 0.1377 0.1378 0.0921 -1.0946 -0.1038 1.0728 -0.0672 -1.0392 -0.0965 1.1074 -0.0763 1.099511 1.074903 1.043671 1.110025 6867.7531 6904.5410 6863.4534 6891.0744 4.109582 4.190900 4.112447 4.190650 -9.592037 -9.597837 -9.591357 -9.595717 -0.01090 -0.08550 0.03410 -0.08640 -1.08370 -0.01830 -1.07330 -0.01010 1.208924 1.174741 0.007429 0.008628 -9.594937 -9.593537 (1.136, 1.175]
In [46]:
# fN[0][0]  ==> lsst_mono_z1.5_000.fits
# fN[1][0]  ==> lsst_mono90_z1.5_000.fits
#
# id[0][0]  ==> id of given object for mono file number 0
In [47]:
# take only first file number 0 (it has m,m9,c and c9)

df_gmd_sq_lt_bin10_file0 = df_gmd_sq_lt_bin10[df_gmd_sq_lt_bin10['fN[0][0]'] == 0]

# add radius for mono
df_gmd_sq_lt_bin10_file0['radius_mono'] = \
(df_gmd_sq_lt_bin10_file0['radius[0][0]'] + 
 df_gmd_sq_lt_bin10_file0['radius[1][0]'] ) /2.0

print(df_gmd_sq_lt_bin10_file0.shape)

df_gmd_sq_lt_bin10_file0.head()
(0, 58)
Out[47]:
fN[0][0] fN[1][0] fN[2][0] fN[3][0] id[0][0] id[1][0] id[2][0] id[3][0] x[0] x[1] errx[0][0] errx[0][1] errx[1][0] errx[1][1] errx[2][0] errx[2][1] errx[3][0] errx[3][1] g[0][0] g[0][1] g[1][0] g[1][1] g[2][0] g[2][1] g[3][0] g[3][1] ellip[0][0] ellip[1][0] ellip[2][0] ellip[3][0] flux[0][0] flux[1][0] flux[2][0] flux[3][0] radius[0][0] radius[1][0] radius[2][0] radius[3][0] mag[0][0] mag[1][0] mag[2][0] mag[3][0] gm[0] gm[1] gc[0] gc[1] gmd[0] gmd[1] gcd[0] gcd[1] g_sq gmd_sq gm_sq gc_sq mag_mono mag_chro g_sq_bins radius_mono

Regions above and below for gsq vs gmdsq contour plot

For example, take points:
Lower line below the diagonal line
point on x-axis: x1,y1 = 10,0
point on y-axis: x2,y2 = 99,90

here 20,0,99,80 are bin number, their values are obtained from bins_dict
x1,y1 = bins_dict[10], bins_dict[0]
x2,y2 = bins_dict[99], bins_dict[90]


Equation of straight line:
y-y1 = y2-y1 * (x-x1)
       -----
       x2-x1

boundary: (x2-x1) * (y-y1) - (y2-y1) * (x-x1)
In [48]:
N = 100
bins = np.linspace(0, max_abs_xcol_ycol,N+1)
bins_dict = {i:v for i,v in enumerate(bins)}

# points from contour plot
x1y1x2y2 = [10,0,99,90]
print(x1y1x2y2)

# actual values of x1, y1, x2, and y2
x1,y1 = bins_dict[x1y1x2y2[0]], bins_dict[x1y1x2y2[1]]
x2,y2 = bins_dict[x1y1x2y2[2]], bins_dict[x1y1x2y2[3]]

df['above'] = df.eval(" ( (@x2-@x1) * gmd_sq )  >= ( (@y2-@y1) * (g_sq - @x1 )) ")

print(df['above'].value_counts())
print()
print(df['above'].value_counts(normalize=True))
[10, 0, 99, 90]
True     23205
False    12256
Name: above, dtype: int64

True     0.654381
False    0.345619
Name: above, dtype: float64
In [49]:
df[df.above==True]['gm_sq'].plot.hist(figsize=(12,8),bins=60,color='b',label='Above')

nabove = len(df[df.above==True])
nbelow = len(df[df.above==False])

above_pct = nabove/(nabove+nbelow)*100
below_pct = 100-above_pct
text = f'Above = {nabove:,} ({above_pct:,.0f}%)\nBelow = {nbelow:,} ( {below_pct:,.0f}%)'

plt.text(0.5,10_000,text,fontsize=14)
plt.legend()

plt.xlabel('gm_sq')
plt.title('gm_sq histograms for above and below cases', fontsize=20)

df[df.above==False]['gm_sq'].plot.hist(figsize=(12,8),bins=60,color='r',alpha=0.5,label='Below')
plt.legend()
plt.xlabel('gm_sq')
plt.title('gm_sq histograms below the boundary', fontsize=20)
Out[49]:
Text(0.5, 1.0, 'gm_sq histograms below the boundary')
In [50]:
df[df.above==True]['gm_sq'].plot.hist(figsize=(12,8),bins=60,color='r',alpha=0.5)
plt.xlabel('gm_sq')
plt.title('gm_sq histogram', fontsize=20)
Out[50]:
Text(0.5, 1.0, 'gm_sq histogram')
In [51]:
df[df.above==False]['gm_sq'].plot.hist(figsize=(12,8),bins=60,color='r',alpha=0.5,label='Below')
plt.legend()
plt.xlabel('gm_sq')
plt.title('gm_sq histograms below the boundary', fontsize=20)
Out[51]:
Text(0.5, 1.0, 'gm_sq histograms below the boundary')

Now, Take only the data above the lower diagonal as cleaned data

In [52]:
rows_before = df.shape[0]
df = df[df.above==True]

print(f'Before rows: {rows_before}, After rows: {df.shape[0]}')
Before rows: 35461, After rows: 23205
In [53]:
plt.figure(figsize=(12,8))
sns.distplot(df['gm_sq'],label='gm_sq')
plt.legend()
plt.savefig('images/gmsq_histogram_after_cleaning.png',dpi=300)

gm vs gc Plots

Equation of straight line:
y-y1 = y2-y1 * (x-x1)
       -----
       x2-x1

boundary: (x2-x1) * (y-y1) - (y2-y1) * (x-x1)
In [54]:
fig,ax = plt.subplots(1,2,figsize=(16,8))
df.plot.scatter(x='gm[0]',y='gc[0]', ax=ax[0])
df.plot.scatter(x='gm[1]',y='gc[1]', ax=ax[1])

ax[0].set_xlim(-2.0,2.0)
ax[0].set_ylim(-2.0,2.0)

ax[1].set_xlim(-2.0,2.0)
ax[1].set_ylim(-2.0,2.0)

# 45 degree line
n=2.0
ax[0].plot([-n,n],[-n,n],'r--')
ax[1].plot([-n,n],[-n,n],'r--')

plt.suptitle('gm vs gc plot',weight='bold',fontsize=24);

Remove outliers based on gm vs gc plot

In [55]:
# Two scatter plots
#===========================================
fig,ax = plt.subplots(1,2,figsize=(16,8))
plt.suptitle('gm vs gc plot',weight='bold',fontsize=24);

df.plot.scatter(x='gm[0]',y='gc[0]', ax=ax[0])
df.plot.scatter(x='gm[1]',y='gc[1]', ax=ax[1])

ax[0].set_xlim(-2.0,2.0)
ax[0].set_ylim(-2.0,2.0)

ax[1].set_xlim(-2.0,2.0)
ax[1].set_ylim(-2.0,2.0)

# 45 degree line
n=2.0
ax[0].plot([-n,n],[-n,n],'r--')
ax[1].plot([-n,n],[-n,n],'r--')



# Left plot gm0 vs gc0 parallel lines
#===========================================
x = df['gm[0]'].to_numpy()
y = df['gc[0]'].to_numpy()

# Find distance for parallel lines
sigma = (x - y).std()
n = 10
d = n * sigma
c = d/np.sqrt(2) 
y1 = x + c
y2 = x - c

# left plot parallel lines
ax[0].plot(y1,x,'b-.',label=f'{n} sigma')
ax[0].plot(y2,x,'b-.')


# Right plot gm1 vs gc1 parallel lines
#===========================================
x = df['gm[1]'].to_numpy()
y = df['gc[1]'].to_numpy()

sigma = (x - y).std()
n = 10
d = n * sigma
c = d/np.sqrt(2) 
y1 = x + c
y2 = x - c

ax[1].plot(y1,x,'b-.', label=f'{n} sigma')
ax[1].plot(y2,x,'b-.')


#=============
# Add legends
# plt.tight_layout()
ax[0].legend(loc='upper left')
ax[1].legend(loc='upper left')

plt.show()
In [56]:
# Remove outliers for gc0 vs gm0
n = 10
sigma = (df['gm[0]'] - df['gc[0]']).std()
d = n * sigma
c = d/np.sqrt(2)

cond_upper = (df['gc[0]'] - df['gm[0]'] <= c)
cond_below = (df['gm[0]'] - df['gc[0]'] <= c)

df = df[ cond_upper & cond_below ]


# df.plot.scatter(x='gm[0]',y='gc[0]')
In [57]:
# Remove outliers for gc1 vs gm1
n = 10
sigma = (df['gm[1]'] - df['gc[1]']).std()
d = n * sigma
c = d/np.sqrt(2)

cond_upper = (df['gc[1]'] - df['gm[1]'] <= c)
cond_below = (df['gm[1]'] - df['gc[1]'] <= c)

df = df[ cond_upper & cond_below ]


# df.plot.scatter(x='gm[1]',y='gc[1]')
In [58]:
# Two scatter plots
#===========================================
fig,ax = plt.subplots(1,2,figsize=(16,8))
plt.suptitle('gm vs gc plot',weight='bold',fontsize=24);

df.plot.scatter(x='gm[0]',y='gc[0]', ax=ax[0])
df.plot.scatter(x='gm[1]',y='gc[1]', ax=ax[1])

ax[0].set_xlim(-2.0,2.0)
ax[0].set_ylim(-2.0,2.0)

ax[1].set_xlim(-2.0,2.0)
ax[1].set_ylim(-2.0,2.0)

# 45 degree line
n=2.0
ax[0].plot([-n,n],[-n,n],'r--')
ax[1].plot([-n,n],[-n,n],'r--')


# Left plot gm0 vs gc0 parallel lines
#===========================================
x = df['gm[0]'].to_numpy()
y = df['gc[0]'].to_numpy()

# Find distance for parallel lines
sigma = (x - y).std()
n = 10
d = n * sigma
c = d/np.sqrt(2) 
y1 = x + c
y2 = x - c

# left plot parallel lines
ax[0].plot(y1,x,'b-.',label=f'{n} sigma')
ax[0].plot(y2,x,'b-.')



# Right plot gm1 vs gc1 parallel lines
#===========================================
x = df['gm[1]'].to_numpy()
y = df['gc[1]'].to_numpy()

sigma = (x - y).std()
n = 10
d = n * sigma
c = d/np.sqrt(2) 
y1 = x + c
y2 = x - c

ax[1].plot(y1,x,'b-.', label=f'{n} sigma')
ax[1].plot(y2,x,'b-.')


#=============
# Add legends
# plt.tight_layout()
ax[0].legend(loc='upper left')
ax[1].legend(loc='upper left')

plt.show()

Plot magnidude for different bin numbers

In [59]:
df.filter(regex='mag').head(2)
Out[59]:
mag[0][0] mag[1][0] mag[2][0] mag[3][0] mag_mono mag_chro
0 -11.432859 -11.439836 -11.430606 -11.437581 -11.436348 -11.434093
1 -10.544291 -10.541927 -10.541729 -10.539594 -10.543109 -10.540662
In [60]:
df[['gm_sq','gc_sq']].describe()
Out[60]:
gm_sq gc_sq
count 2.309700e+04 2.309700e+04
mean 3.610357e-02 3.447451e-02
std 9.465974e-02 9.356339e-02
min 5.000000e-08 1.450000e-07
25% 1.262953e-03 1.222600e-03
50% 3.922842e-03 3.719472e-03
75% 1.780393e-02 1.650081e-02
max 1.142832e+00 1.124436e+00
In [61]:
def plot_bin_mag_mono_chro(nbins,show=False,ret=False):
    import os
    
    if not os.path.isdir('images'):
        os.makedirs('images')
    
    df['bins_mag_mono'] = pd.cut(df['mag_mono'],nbins)
    df['bins_mag_chro'] = pd.cut(df['mag_chro'],nbins)
    
    # show the text of bin counts
    text_mono = df.groupby('bins_mag_mono')['gm_sq'].agg(['count', 'mean'])
    text_mono = text_mono.reset_index().assign(pct_change = lambda x: x['mean'].pct_change())
    text_mono = text_mono.to_string().replace('mean','gm_sq')
    
    text_chro = df.groupby('bins_mag_chro')['gc_sq'].agg(['count', 'mean'])
    text_chro = text_chro.reset_index().assign(pct_change = lambda x: x['mean'].pct_change())
    text_chro = text_chro.to_string().replace('mean','gc_sq')
 
    # data to plot
    df_mono_gm_sq_mean = df.groupby('bins_mag_mono')['gm_sq'].mean()
    df_chro_gm_sq_mean = df.groupby('bins_mag_chro')['gc_sq'].mean()
    
    # plot
    fig,ax = plt.subplots(1,2,figsize=(12,8))
    
    # mono (plot the magnitude mean in each bins)
    df_mono_gm_sq_mean.plot(marker='o',ax=ax[0])
    ax[0].tick_params(axis='x', rotation=90)
    ax[0].set_ylabel('gm_sq_mean',fontsize=18)
    ax[0].set_xlabel('bin_mag_mono',fontsize=18)
    ax[0].set_title(f'gm_sq per magnitude bins with nbins = {nbins}')
    ax[0].text(0,0.5,text_mono,fontsize=14,va='center')
    ax[0].set_ylim(0,1)
    ax[0].set_yticks(np.arange(0, 1, step=0.1))
 
    # chro
    df_chro_gm_sq_mean.plot(marker='o',ax=ax[1])
    ax[1].tick_params(axis='x', rotation=90)
    ax[1].set_ylabel('gc_sq_mean',fontsize=18)
    ax[1].set_xlabel('bin_mag_chro',fontsize=18)
    ax[1].set_title(f'gc_sq per magnitude bins with nbins = {nbins}')
    ax[1].text(0,0.5,text_chro,fontsize=14,va='center')
    ax[1].set_ylim(0,1)
    ax[1].set_yticks(np.arange(0, 1, step=0.1))
    
    plt.tight_layout()
    plt.savefig(f'images/bin_mag_mono_chro_{nbins}.png')
    
    if show:
        plt.show()
    plt.close()
    
    if ret:
        return df_mono_gm_sq_mean, df_chro_gm_sq_mean

for nbins in range(5,15):
    plot_bin_mag_mono_chro(nbins,show=True)
In [62]:
"""
Note:
These plots were much different previously without doing some filterings:

https://nbviewer.jupyter.org/github/bpRsh/2019_shear_analysis_after_dmstack/blob/master/Jan_2020/a01_jan8/a01_cleancat15_gc0_gm0.ipynb

""";

Magnitude weight column for Monochromatic case

In [63]:
df['mag_mono'].plot.hist(bins=100)
Out[63]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe5e6d80b10>
In [64]:
nbins = 9 # the bin number looks good in above gm_sq plots
df_mono, df_chro = plot_bin_mag_mono_chro(nbins,show=True,ret=True)
In [65]:
# from plot above I can see from Kth point graph goes up.
# graph is flat upto Kth point then increases linearly to top of curve.

num_start_increasing = 3 
df_mono.index[num_start_increasing]
Out[65]:
Interval(-12.38, -11.623, closed='right')
In [66]:
# how to choose the bottom of the slope mathematically?
# try to see pct change

df_mono.pct_change().to_frame('gm_sq_pct_change').style.background_gradient(
    cmap='RdBu')
Out[66]:
gm_sq_pct_change
bins_mag_mono
(-14.659, -13.895] nan
(-13.895, -13.137] -0.282301
(-13.137, -12.38] -0.149686
(-12.38, -11.623] 0.330761
(-11.623, -10.865] 0.990606
(-10.865, -10.108] 0.556754
(-10.108, -9.351] 0.023662
(-9.351, -8.593] -0.543277
(-8.593, -7.836] 0.204015
In [67]:
# look at smaller part than
df_mono_left = df_mono.pct_change()\
    .loc[df_mono.pct_change().index < df_mono.pct_change().idxmax()]

df_mono_left
Out[67]:
bins_mag_mono
(-14.659, -13.895]         NaN
(-13.895, -13.137]   -0.282301
(-13.137, -12.38]    -0.149686
(-12.38, -11.623]     0.330761
Name: gm_sq, dtype: float64
In [68]:
df_mono.idxmax().left, df_mono.idxmax().right
Out[68]:
(-10.108, -9.351)
In [69]:
# look at case when nbinsK = 9 and when the curve is going up
# index for when curve goes linearly up after initial flat
idx_bottom_of_slope_mono = [df_mono.index[num_start_increasing].left,
                       df_mono.index[num_start_increasing].right
                       ]

mag_low_nbinsK_mono = np.mean(idx_bottom_of_slope_mono)

# index for peak of curve
idx_peak_mono = [df_mono.idxmax().left,
            df_mono.idxmax().right
           ]
mag_high_nbinsK_mono = np.mean(idx_peak_mono)

idx_bottom_of_slope_mono, idx_peak_mono, mag_low_nbinsK_mono, mag_high_nbinsK_mono
Out[69]:
([-12.38, -11.623], [-10.108, -9.351], -12.0015, -9.729500000000002)
In [70]:
from scipy.optimize import curve_fit

xcol = 'mag_mono'
ycol = 'gm_sq'

x = df.query("""  @mag_low_nbinsK_mono < mag_mono <  @mag_high_nbinsK_mono  """)[xcol].to_numpy()
y = df.query("""  @mag_low_nbinsK_mono < mag_mono <  @mag_high_nbinsK_mono  """)[ycol].to_numpy()

def func(x, a, b):
    return a*x + b

params, _ = curve_fit(func, x, y)
[a, b] = params.round(2)

print(f'magnitude ranges for mono: {mag_low_nbinsK_mono}, {mag_high_nbinsK_mono}')
print(f'fitting params   for mono: {a}, {b}' )
magnitude ranges for mono: -12.0015, -9.729500000000002
fitting params   for mono: 0.02, 0.32
In [71]:
mag_low_nbinsK_mono
Out[71]:
-12.0015
In [72]:
def magnitude_weight_mono(mag):
    if mag < mag_low_nbinsK_mono:
        return 1/ (mag_low_nbinsK_mono *a + b)
    
    else:
        return 1/ (a*mag + b)

df['wt_mag_mono'] = df['mag_mono'].apply(magnitude_weight_mono)
df['wt_mag_mono'] = df['wt_mag_mono'] / df['wt_mag_mono'].mean() # normalize by mean

Magnitude weight column for Chromatic case

In [73]:
df['mag_chro'].plot.hist(bins=100)
Out[73]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe5e9334990>
In [74]:
nbins = 9 # the bin number looks good in above gm_sq plots
df_mono, df_chro = plot_bin_mag_mono_chro(nbins,show=True,ret=True)
In [75]:
# from plot above I can see from Kth point graph goes up.
# graph is flat upto Kth point then increases linearly to top of curve.

num_start_increasing = 3 
df_chro.index[num_start_increasing]
Out[75]:
Interval(-12.382, -11.624, closed='right')
In [76]:
# look at case when nbinsK = 9 and when the curve is going up
# index for when curve goes linearly up after initial flat
idx_bottom_of_slope_chro = [df_chro.index[num_start_increasing].left,
                            df_chro.index[num_start_increasing].right
                            ]

mag_low_nbinsK_chro = np.mean(idx_bottom_of_slope_chro)

# index for peak of curve
idx_peak_chro = [df_chro.idxmax().left,
                 df_chro.idxmax().right
                 ]
mag_high_nbinsK_chro = np.mean(idx_peak_chro)

idx_bottom_of_slope_chro, idx_peak_chro, mag_low_nbinsK_chro, mag_high_nbinsK_chro
Out[76]:
([-12.382, -11.624], [-10.11, -9.353], -12.003, -9.7315)
In [77]:
from scipy.optimize import curve_fit


xcol = 'mag_chro'
ycol = 'gc_sq'

x = df.query("""  @mag_low_nbinsK_chro < mag_chro <  @mag_high_nbinsK_chro  """)[xcol].to_numpy()
y = df.query("""  @mag_low_nbinsK_chro < mag_chro <  @mag_high_nbinsK_chro  """)[ycol].to_numpy()

def func(x, a, b):
    return a*x + b

params, _ = curve_fit(func, x, y)
[a, b] = params.round(2)

print(f'magnitude ranges for chro: {mag_low_nbinsK_chro}, {mag_high_nbinsK_chro}')
print(f'fitting params   for chro: {a}, {b}' )
magnitude ranges for chro: -12.003, -9.7315
fitting params   for chro: 0.02, 0.32
In [78]:
mag_low_nbinsK_chro
Out[78]:
-12.003
In [79]:
def magnitude_weight_chro(mag):
    if mag < mag_low_nbinsK_chro:
        return 1/ (mag_low_nbinsK_chro*a + b)
    
    else:
        return 1/ (a*mag + b)

df['wt_mag_chro'] = df['mag_chro'].apply(magnitude_weight_chro)
df['wt_mag_chro'] = df['wt_mag_chro'] / df['wt_mag_chro'].mean() # normalize by mean

# mean
df['wt_mag']      = (df['wt_mag_mono'] + df['wt_mag_chro']) / 2

# df.drop(['wt_mag_chro','wt_mag_mono'],axis=1,inplace=True)

df.iloc[:2,-7:]
Out[79]:
g_sq_bins above bins_mag_mono bins_mag_chro wt_mag_mono wt_mag_chro wt_mag
0 (1.136, 1.175] True (-11.623, -10.865] (-11.624, -10.867] 1.093726 1.091908 1.092817
1 (1.058, 1.097] True (-10.865, -10.108] (-10.867, -10.11] 0.914694 0.913215 0.913955

Ellipticity Components Transformation

c2 = (dx * dx - dy * dy) / (r * r);
s2 = 2 * dx * dy / (r * r);
eX = s2 * e[0] + c2 * e[1];
eesum += eX * eX * w[0] * w[0];
eTsum[bin] -= (c2 * e[0] + s2 * e[1]) * w[0];
In [80]:
df.head(2)
Out[80]:
fN[0][0] fN[1][0] fN[2][0] fN[3][0] id[0][0] id[1][0] id[2][0] id[3][0] x[0] x[1] errx[0][0] errx[0][1] errx[1][0] errx[1][1] errx[2][0] errx[2][1] errx[3][0] errx[3][1] g[0][0] g[0][1] g[1][0] g[1][1] g[2][0] g[2][1] g[3][0] g[3][1] ellip[0][0] ellip[1][0] ellip[2][0] ellip[3][0] flux[0][0] flux[1][0] flux[2][0] flux[3][0] radius[0][0] radius[1][0] radius[2][0] radius[3][0] mag[0][0] mag[1][0] mag[2][0] mag[3][0] gm[0] gm[1] gc[0] gc[1] gmd[0] gmd[1] gcd[0] gcd[1] g_sq gmd_sq gm_sq gc_sq mag_mono mag_chro g_sq_bins above bins_mag_mono bins_mag_chro wt_mag_mono wt_mag_chro wt_mag
0 125 125 125 125 673 669 679 665 2812.7470 1504.8803 0.0243 0.0167 0.0160 0.0271 0.0243 0.0167 0.0161 0.0271 1.0389 0.3300 -1.0468 -0.2552 0.9883 0.3218 -1.0469 -0.2572 1.090052 1.077459 1.039371 1.078031 37423.424 37664.673 37345.853 37586.541 4.157839 4.217269 4.157549 4.216983 -11.432859 -11.439836 -11.430606 -11.437581 -0.00395 0.0374 -0.0293 0.03230 1.04285 0.2926 1.0176 0.28950 1.188213 1.173151 0.001414 0.001902 -11.436348 -11.434093 (1.136, 1.175] True (-11.623, -10.865] (-11.624, -10.867] 1.093726 1.091908 1.092817
1 125 125 125 125 3624 3617 3627 3615 1476.0647 2177.7419 0.0575 0.0381 0.0362 0.0575 0.0573 0.0381 0.0362 0.0580 1.0092 0.2622 -1.0400 -0.1400 0.9842 0.2581 -1.0684 -0.1422 1.042705 1.049381 1.017480 1.077822 16508.831 16472.935 16469.919 16437.573 4.225692 4.127967 4.224107 4.127593 -10.544291 -10.541927 -10.541729 -10.539594 -0.01540 0.0611 -0.0421 0.05795 1.02460 0.2011 1.0263 0.20015 1.087233 1.090246 0.003970 0.005131 -10.543109 -10.540662 (1.058, 1.097] True (-10.865, -10.108] (-10.867, -10.11] 0.914694 0.913215 0.913955
In [81]:
# constants
RMIN = 10
DLNR = 0.5

df['dx'] = df['x[0]'] - 1699 # jesisim output fitsfiles have shape 3398, 3398
df['dy'] = df['x[1]'] - 1699

df['r'] = np.hypot(df['dx'], df['dy'])
# df['r'] = np.sqrt(df['dx']**2 + df['dy']**2)

df['cos2theta'] = df.eval(' (dx * dx - dy * dy) / (r * r)' )
df['sin2theta'] = df.eval(' (2  * dx * dy     ) / (r * r)' )

df['bin'] = ( np.log(df.r / RMIN) / DLNR).astype(int)

df['bin'].value_counts()
Out[81]:
9     9378
10    7777
8     3645
7     1436
6      545
5      257
4       51
2        4
1        3
0        1
Name: bin, dtype: int64
In [82]:
df['eX_mono'] =       df['sin2theta'] * df['gm[0]'] + df['cos2theta'] * df['gm[1]']
df['eT_mono'] = -1 * (df['cos2theta'] * df['gm[0]'] + df['sin2theta'] * df['gm[1]'] ) 

df['eX_chro'] =       df['sin2theta'] * df['gc[0]'] + df['cos2theta'] * df['gc[1]']
df['eT_chro'] = -1 * (df['cos2theta'] * df['gc[0]'] + df['sin2theta'] * df['gc[1]']  )
In [83]:
df['eT_mono_times_wt'] = df['eT_mono'] * df['wt_mag']
df['eT_chro_times_wt'] = df['eT_chro'] * df['wt_mag']
In [84]:
df['eX_monosq_times_wt'] = df['eX_mono'] *  df['eX_mono'] * df['wt_mag']
df['eX_chrosq_times_wt'] = df['eX_chro'] *  df['eX_chro'] * df['wt_mag']

df['eXsq_times_wt'] = df.eval(" (eX_monosq_times_wt + eX_chrosq_times_wt) / 2 ")

df.iloc[:2,-6:]
Out[84]:
eT_chro eT_mono_times_wt eT_chro_times_wt eX_monosq_times_wt eX_chrosq_times_wt eXsq_times_wt
0 0.038500 0.017889 0.042073 0.001458 0.001776 0.001617
1 0.017258 0.033682 0.015773 0.000693 0.000024 0.000358

Error Analysis

https://www.phenix.bnl.gov/WWW/publish/elke/EIC/Files-for-Wiki/lara.02-008.errors.pdf

When the statistics involved in calculating $E_A$ and $E_B$ are not indendent, the error for a function $f(E_A, E_B)$ has the expression: $$ \sigma_{f}=\sqrt{\left(\frac{\partial f}{\partial E_{A}} \sigma_{A}\right)^{2}+\left(\frac{\partial f}{\partial E_{B}} \sigma_{B}\right)^{2}+2 \frac{\partial f}{\partial E_{A}} \frac{\partial f}{\partial E_{B}} \operatorname{cov}\left(E_{A}, E_{B}\right)} $$

where the last term takes care of the correlations between $E_A$ and $E_B$. Given a large number $N$ of measurements $E_{A_i}$, the standard deviation $\sigma_A$ is empirically defined as:

$$ \sigma_{A}^{2}=\frac{1}{N-1} \sum_{i=1}^{N}\left(E_{A_{i}}-E_{A}\right)^{2} $$

while the covariance between $E_A$ and $E_B$ is given by: $$ \operatorname{cov}\left(E_{A}, E_{B}\right)=\frac{1}{N-1} \sum_{i=1}^{N}\left(E_{A_{i}}-E_{A}\right)\left(E_{B_{i}}-E_{B}\right) $$

where $E_A$ and $E_B$ are the averages of $E_{A_i}$ and $E_{B_i}$. When $E_A$ and $E_B$ are independent, over a large number $N$ of measurements they will fluctuate around their average in an uncorrelated way, so that the covariance is zero and one recovers the usual formula for the propagation of errors in a function of independent variables.

In [85]:
"""
f = m/c
df/dm = 1/c
df/dc = -m/c2

s2 ==> sigma
s2 = r2 (sm2/m2 + sc2/c2 -2/m/c cov(m,c))

put m=c,
s2 = 2/m2 (sm2 - cov)

error = sigma/sqrt(n)

error = 
            0.000332
      sqrt  --------
             eT(bin)**2
    ---------------------
     sqrt(ngals_bin)
""";
In [86]:
cov = df[['eT_chro','eT_mono']].cov()
cov
Out[86]:
eT_chro eT_mono
eT_chro 0.017643 0.017823
eT_mono 0.017823 0.018590
In [87]:
diag_mean = np.diag(cov).mean()
diag_mean
Out[87]:
0.01811649316727943
In [88]:
offdiag = cov.iloc[0,1]
offdiag
Out[88]:
0.017822869300617133
In [89]:
diag_mean - offdiag
Out[89]:
0.0002936238666622973
In [90]:
myerr = (diag_mean - offdiag) * 2
myerr
Out[90]:
0.0005872477333245946
In [91]:
# temporary value to calculate error
# err_coeff = 0.000332

err_coeff = myerr

df['err_numerator'] = myerr  # np.sqrt(err_coeff/df['eT_mono']/df['eT_mono'])
df['eX_monosq_times_wt_std'] = df['eX_monosq_times_wt'].std()
df['eX_chrosq_times_wt_std'] = df['eX_chrosq_times_wt'].std()
df['eT_ratio'] = df['eT_mono'] / df['eT_chro']
df['eT_ratio_std'] = df['eT_ratio'].std()
df.iloc[:2,-5:]
Out[91]:
err_numerator eX_monosq_times_wt_std eX_chrosq_times_wt_std eT_ratio eT_ratio_std
0 0.000587 0.055519 0.054639 0.425195 16.501066
1 0.000587 0.055519 0.054639 2.135405 16.501066

Radial Binnings

In [92]:
# constants
RMIN = 10
DLNR = 0.5
In [93]:
# renaming aggegration needs pandas > 0.25
pd.__version__
Out[93]:
'1.0.3'
In [94]:
df.filter(regex='times|err_numerator').head().round(1)
Out[94]:
eT_mono_times_wt eT_chro_times_wt eX_monosq_times_wt eX_chrosq_times_wt eXsq_times_wt err_numerator eX_monosq_times_wt_std eX_chrosq_times_wt_std
0 0.0 0.0 0.0 0.0 0.0 0.0 0.1 0.1
1 0.0 0.0 0.0 0.0 0.0 0.0 0.1 0.1
9 0.1 0.1 0.0 0.0 0.0 0.0 0.1 0.1
10 -0.3 -0.4 0.2 0.2 0.2 0.0 0.1 0.1
11 0.1 0.1 0.0 0.0 0.0 0.0 0.1 0.1
In [95]:
df_radial_bins = df.groupby('bin').agg(
    r_mean                 = ('r', 'mean'),
    wt_mag_sum             = ('wt_mag', 'sum'),
    eT_mono_times_wt_sum   = ('eT_mono_times_wt', 'sum'),
    eT_chro_times_wt_sum   = ('eT_chro_times_wt', 'sum'),
    eX_monosq_times_wt_sum = ('eX_monosq_times_wt', 'sum'),
    eX_chrosq_times_wt_sum = ('eX_chrosq_times_wt', 'sum'),
    err_numerator_mean     = ('err_numerator', 'mean')
                                       )


# bin counts
df_radial_bins['bin_count'] = df['bin'].value_counts()

# columns after binning 
df_radial_bins['eT_mean_mono'] = (df_radial_bins['eT_mono_times_wt_sum'] / 
                                  df_radial_bins['wt_mag_sum'])

df_radial_bins['eT_mean_chro'] = (df_radial_bins['eT_chro_times_wt_sum'] /
                                  df_radial_bins['wt_mag_sum'])
               
df_radial_bins['eX_mean_mono'] = np.sqrt(df_radial_bins['eX_monosq_times_wt_sum'] /
                                         df_radial_bins['wt_mag_sum'])
                                                     
df_radial_bins['eX_mean_chro'] = np.sqrt(df_radial_bins['eX_chrosq_times_wt_sum'] /
                                         df_radial_bins['wt_mag_sum'])
               
df_radial_bins['eT_ratio']     = (df_radial_bins['eT_mean_mono'] /
                                  df_radial_bins['eT_mean_chro'])


# Errors
sqrt_bin = np.sqrt(df_radial_bins['bin_count'])
df_radial_bins['eT_mono_err'] = df_radial_bins['eX_mean_mono'] / sqrt_bin
df_radial_bins['eT_chro_err'] = df_radial_bins['eX_mean_chro'] / sqrt_bin
df_radial_bins['eT_ratio_err'] = ( np.sqrt(df_radial_bins['err_numerator_mean']/
                                  df_radial_bins['eT_mean_mono'] /
                                  df_radial_bins['eT_mean_chro']) /
                                   sqrt_bin)

print('Statistics for different radial bins')
print(f'RMIN = {RMIN} and DLNR = {DLNR}')

df_radial_bins.style\
.background_gradient(subset=['eT_ratio_err'],cmap='Blues')
Statistics for different radial bins
RMIN = 10 and DLNR = 0.5
Out[95]:
r_mean wt_mag_sum eT_mono_times_wt_sum eT_chro_times_wt_sum eX_monosq_times_wt_sum eX_chrosq_times_wt_sum err_numerator_mean bin_count eT_mean_mono eT_mean_chro eX_mean_mono eX_mean_chro eT_ratio eT_mono_err eT_chro_err eT_ratio_err
bin
0 15.922736 0.821853 0.154154 0.233722 0.382717 0.654706 0.000587 1 0.187569 0.284384 0.682404 0.892537 0.659560 0.682404 0.892537 0.104925
1 20.798501 2.187788 0.062590 0.029083 0.186843 0.181571 0.000587 3 0.028609 0.013293 0.292238 0.288085 2.152115 0.168724 0.166326 0.717437
2 37.105380 3.507703 0.566152 0.660272 0.408234 0.417738 0.000587 4 0.161403 0.188235 0.341148 0.345096 0.857453 0.170574 0.172548 0.069514
4 103.776339 48.656491 22.751395 22.173549 7.531033 7.075781 0.000587 51 0.467592 0.455716 0.393420 0.381344 1.026060 0.055090 0.053399 0.007351
5 164.304417 257.235018 84.027721 82.896269 19.043627 18.771314 0.000587 257 0.326657 0.322259 0.272088 0.270136 1.013649 0.016972 0.016851 0.004659
6 268.200572 551.431274 102.152082 99.458490 18.205595 17.478648 0.000587 545 0.185249 0.180364 0.181701 0.178036 1.027083 0.007783 0.007626 0.005679
7 444.108025 1443.865730 161.972776 158.982472 29.348119 27.921324 0.000587 1436 0.112180 0.110109 0.142570 0.139061 1.018809 0.003762 0.003670 0.005754
8 735.987041 3612.853499 216.039025 211.361175 60.047668 56.869301 0.000587 3645 0.059797 0.058503 0.128921 0.125462 1.022132 0.002135 0.002078 0.006786
9 1210.338622 9369.695779 315.056644 310.666953 137.358279 132.273338 0.000587 9378 0.033625 0.033157 0.121078 0.118816 1.014130 0.001250 0.001227 0.007494
10 1733.996712 7806.744866 158.556109 157.719344 116.236744 109.799053 0.000587 7777 0.020310 0.020203 0.122022 0.118594 1.005305 0.001384 0.001345 0.013566
In [96]:
# why some eT values are -ve?
"""
1. For given rmin and dlnr we have some bins very few object counts.

""";
In [97]:
pd.cut(df['eT_mono'],20).value_counts().to_frame().style.background_gradient()
Out[97]:
eT_mono
(-0.00109, 0.0873] 13818
(0.0873, 0.176] 2971
(-0.0895, -0.00109] 2787
(0.176, 0.264] 920
(-0.178, -0.0895] 654
(0.264, 0.352] 454
(-0.266, -0.178] 291
(0.352, 0.441] 245
(-0.355, -0.266] 174
(0.441, 0.529] 172
(-0.443, -0.355] 125
(0.529, 0.618] 123
(-0.531, -0.443] 91
(-0.62, -0.531] 85
(0.618, 0.706] 74
(-0.708, -0.62] 51
(-0.797, -0.708] 26
(0.706, 0.794] 26
(0.794, 0.883] 5
(-0.887, -0.797] 5
In [98]:
fig,ax = plt.subplots(figsize=(12,8))
sns.distplot(df['eT_mono'],ax=ax)
plt.title('Histogram and density plot of eT_mono');
In [99]:
df['wt_mag'].hist(bins=100,figsize=(12,8));
plt.title('Histogram of wt_mag')
plt.xlabel('wt_mag')
plt.ylabel('Frequency');
plt.savefig('images/weights.png',dpi=300)
In [100]:
df.query(""" r>500 """)['eT_mono'].describe().to_frame()
Out[100]:
eT_mono
count 21146.000000
mean 0.034114
std 0.127468
min -0.884955
25% 0.007847
50% 0.034563
75% 0.065415
max 0.882777
In [101]:
df['gm_sq'].hist(bins=100,figsize=(12,8))
plt.title('Histogram of gm_sq')
plt.ylabel('Frequency')
plt.xlabel('gm_sq')
plt.savefig('images/gm_sq_hist_after_cleaning.png',dpi=300)
In [102]:
df.head(2)
Out[102]:
fN[0][0] fN[1][0] fN[2][0] fN[3][0] id[0][0] id[1][0] id[2][0] id[3][0] x[0] x[1] errx[0][0] errx[0][1] errx[1][0] errx[1][1] errx[2][0] errx[2][1] errx[3][0] errx[3][1] g[0][0] g[0][1] g[1][0] g[1][1] g[2][0] g[2][1] g[3][0] g[3][1] ellip[0][0] ellip[1][0] ellip[2][0] ellip[3][0] flux[0][0] flux[1][0] flux[2][0] flux[3][0] radius[0][0] radius[1][0] radius[2][0] radius[3][0] mag[0][0] mag[1][0] mag[2][0] mag[3][0] gm[0] gm[1] gc[0] gc[1] gmd[0] gmd[1] gcd[0] gcd[1] g_sq gmd_sq gm_sq gc_sq mag_mono mag_chro g_sq_bins above bins_mag_mono bins_mag_chro wt_mag_mono wt_mag_chro wt_mag dx dy r cos2theta sin2theta bin eX_mono eT_mono eX_chro eT_chro eT_mono_times_wt eT_chro_times_wt eX_monosq_times_wt eX_chrosq_times_wt eXsq_times_wt err_numerator eX_monosq_times_wt_std eX_chrosq_times_wt_std eT_ratio eT_ratio_std
0 125 125 125 125 673 669 679 665 2812.7470 1504.8803 0.0243 0.0167 0.0160 0.0271 0.0243 0.0167 0.0161 0.0271 1.0389 0.3300 -1.0468 -0.2552 0.9883 0.3218 -1.0469 -0.2572 1.090052 1.077459 1.039371 1.078031 37423.424 37664.673 37345.853 37586.541 4.157839 4.217269 4.157549 4.216983 -11.432859 -11.439836 -11.430606 -11.437581 -0.00395 0.0374 -0.0293 0.03230 1.04285 0.2926 1.0176 0.28950 1.188213 1.173151 0.001414 0.001902 -11.436348 -11.434093 (1.136, 1.175] True (-11.623, -10.865] (-11.624, -10.867] 1.093726 1.091908 1.092817 1113.7470 -194.1197 1130.537411 0.941034 -0.338311 9 0.036531 0.016370 0.040308 0.038500 0.017889 0.042073 0.001458 0.001776 0.001617 0.000587 0.055519 0.054639 0.425195 16.501066
1 125 125 125 125 3624 3617 3627 3615 1476.0647 2177.7419 0.0575 0.0381 0.0362 0.0575 0.0573 0.0381 0.0362 0.0580 1.0092 0.2622 -1.0400 -0.1400 0.9842 0.2581 -1.0684 -0.1422 1.042705 1.049381 1.017480 1.077822 16508.831 16472.935 16469.919 16437.573 4.225692 4.127967 4.224107 4.127593 -10.544291 -10.541927 -10.541729 -10.539594 -0.01540 0.0611 -0.0421 0.05795 1.02460 0.2011 1.0263 0.20015 1.087233 1.090246 0.003970 0.005131 -10.543109 -10.540662 (1.058, 1.097] True (-10.865, -10.108] (-10.867, -10.11] 0.914694 0.913215 0.913955 -222.9353 478.7419 528.104114 -0.643591 -0.765370 7 -0.027537 0.036853 -0.005074 0.017258 0.033682 0.015773 0.000693 0.000024 0.000358 0.000587 0.055519 0.054639 2.135405 16.501066

Plot of eT for mono and chro

In [120]:
def plot_eT_mean(rmin, dlnr,min_bin_count,
                 show_ratio=True,
                 show_mono_chro=False,
                 show_data=True):
    # title
    title = f'rmin={rmin}, dlnr={dlnr}, min_bin_count={min_bin_count}'
    
    # radial bin
    df['bin'] = ( np.log(df.r / rmin) / dlnr).astype(int)

    # aggregations per given bin
    df_radial_bins = df.groupby('bin').agg(
        r_mean                 = ('r', 'mean'),
        wt_mag_sum             = ('wt_mag', 'sum'),
        eT_mono_times_wt_sum   = ('eT_mono_times_wt', 'sum'),
        eT_chro_times_wt_sum   = ('eT_chro_times_wt', 'sum'),
        eX_monosq_times_wt_sum = ('eX_monosq_times_wt', 'sum'),
        eX_chrosq_times_wt_sum = ('eX_chrosq_times_wt', 'sum'),
        err_numerator_mean     = ('err_numerator', 'mean')
                                           )
    
    # bin counts
    df_radial_bins['bin_count'] = df['bin'].value_counts()
    df_radial_bins = df_radial_bins.query(""" bin_count > @min_bin_count """) 
    df_radial_bins = df_radial_bins.iloc[:-1,:]

    # columns after binning 
    df_radial_bins['eT_mean_mono'] = (df_radial_bins['eT_mono_times_wt_sum'] / 
                                      df_radial_bins['wt_mag_sum'])

    df_radial_bins['eT_mean_chro'] = (df_radial_bins['eT_chro_times_wt_sum'] /
                                      df_radial_bins['wt_mag_sum'])

    df_radial_bins['eX_mean_mono'] = np.sqrt(
        df_radial_bins['eX_monosq_times_wt_sum'] /
        df_radial_bins['wt_mag_sum']
    )

    df_radial_bins['eX_mean_chro'] = np.sqrt(
        df_radial_bins['eX_chrosq_times_wt_sum'] /
        df_radial_bins['wt_mag_sum']
    )

    df_radial_bins['eT_ratio']     = (df_radial_bins['eT_mean_mono'] /
                                      df_radial_bins['eT_mean_chro'])

    
    # Errors
    sqrt_bin = np.sqrt(df_radial_bins['bin_count'])
    df_radial_bins['eT_mono_err'] = df_radial_bins['eX_mean_mono'] / sqrt_bin
    df_radial_bins['eT_chro_err'] = df_radial_bins['eX_mean_chro'] / sqrt_bin
    df_radial_bins['eT_ratio_err'] = ( np.sqrt(df_radial_bins['err_numerator_mean']/
                                  df_radial_bins['eT_mean_mono'] /
                                  df_radial_bins['eT_mean_chro']) /
                                   sqrt_bin)
 
    # also plot eT mono and eT chro
    if show_ratio:
        fig, ax = plt.subplots(1,1,figsize=(12,8))
        df_radial_bins.plot.line(x='r_mean',y='eT_ratio',
                                 yerr='eT_ratio_err',
                                 marker='o',color='b',ax=ax)
        ax.set_xlabel('Radius',fontsize=18)
        ax.set_ylabel(r'$\frac{eT_{mono}}{eT_{chro}}$',fontsize=18)
        plt.ylim(0.95, 1.0)
        
    if show_mono_chro:
        fig, ax = plt.subplots(3,1,figsize=(14,12))
        
        # top
        df_radial_bins.plot.line(x='r_mean',y='eT_mean_mono',
                                 yerr='eT_mono_err',
                                 marker='o',color='r', ax=ax[0])
        
        # top
        df_radial_bins.plot.line(x='r_mean',y='eT_mean_chro',
                                 yerr='eT_chro_err',
                                 marker='o',color='b',ax=ax[0])
        # middle
        df_radial_bins.plot.line(x='r_mean',y='eT_mean_mono',
                                 yerr='eT_mono_err',
                                 marker='o',color='r', ax=ax[1])
        
        # bottom
        df_radial_bins.plot.line(x='r_mean',y='eT_mean_chro',
                                 yerr='eT_chro_err',
                                 marker='o',color='b',ax=ax[2])
        
        # label
        ax[0].set_xlabel('')
        ax[1].set_xlabel('')
        ax[2].set_xlabel('Radius',fontsize=18)
        ax[0].set_ylabel('eT',fontsize=18)
        ax[1].set_ylabel('eT_mean_mono',fontsize=18)
        ax[2].set_ylabel('eT_mean_chro',fontsize=18)


    plt.suptitle(f'Plot of eT for {title}',fontsize=24,weight='bold')
    
    # also show the dataframe
    if show_data:
        display(df_radial_bins.style.background_gradient(subset=['eT_mean_mono']))
    
    

    plt.ylim(0,2.0)
    plt.savefig('images/eT_versus_radius.png',dpi=300)
    plt.tight_layout()
    plt.show()

# plot now
plot_eT_mean(rmin=300, dlnr=0.5,min_bin_count=10,
            show_ratio=True,show_mono_chro=False)
r_mean wt_mag_sum eT_mono_times_wt_sum eT_chro_times_wt_sum eX_monosq_times_wt_sum eX_chrosq_times_wt_sum err_numerator_mean bin_count eT_mean_mono eT_mean_chro eX_mean_mono eX_mean_chro eT_ratio eT_mono_err eT_chro_err eT_ratio_err
bin
-2 97.048385 31.101104 13.486853 13.065372 4.823565 4.446951 0.000587 33 0.433645 0.420093 0.393819 0.378132 1.032259 0.068555 0.065824 0.009884
-1 150.652687 200.628951 74.882784 74.090191 17.787456 17.727269 0.000587 204 0.373240 0.369290 0.297756 0.297252 1.010698 0.020847 0.020812 0.004570
0 358.032309 1676.933131 245.470683 239.614595 43.917687 41.394924 0.000587 1666 0.146381 0.142889 0.161831 0.157114 1.024440 0.003965 0.003849 0.004105
1 668.376358 3012.449472 199.069787 195.618177 53.845246 51.572057 0.000587 3033 0.066082 0.064937 0.133695 0.130842 1.017645 0.002428 0.002376 0.006717
2 1096.742691 7757.245684 306.984092 300.179360 116.155354 112.428149 0.000587 7780 0.039574 0.038697 0.122367 0.120388 1.022669 0.001387 0.001365 0.007021
3 1648.160056 10307.457474 220.627384 220.233253 148.755978 140.550939 0.000587 10269 0.021405 0.021366 0.120133 0.116773 1.001790 0.001185 0.001152 0.011182
In [118]:
plot_eT_mean(rmin=300, dlnr=0.5,min_bin_count=10,
            show_ratio=False,show_mono_chro=True)
r_mean wt_mag_sum eT_mono_times_wt_sum eT_chro_times_wt_sum eX_monosq_times_wt_sum eX_chrosq_times_wt_sum err_numerator_mean bin_count eT_mean_mono eT_mean_chro eX_mean_mono eX_mean_chro eT_ratio eT_mono_err eT_chro_err eT_ratio_err
bin
-2 97.048385 31.101104 13.486853 13.065372 4.823565 4.446951 0.000587 33 0.433645 0.420093 0.393819 0.378132 1.032259 0.068555 0.065824 0.009884
-1 150.652687 200.628951 74.882784 74.090191 17.787456 17.727269 0.000587 204 0.373240 0.369290 0.297756 0.297252 1.010698 0.020847 0.020812 0.004570
0 358.032309 1676.933131 245.470683 239.614595 43.917687 41.394924 0.000587 1666 0.146381 0.142889 0.161831 0.157114 1.024440 0.003965 0.003849 0.004105
1 668.376358 3012.449472 199.069787 195.618177 53.845246 51.572057 0.000587 3033 0.066082 0.064937 0.133695 0.130842 1.017645 0.002428 0.002376 0.006717
2 1096.742691 7757.245684 306.984092 300.179360 116.155354 112.428149 0.000587 7780 0.039574 0.038697 0.122367 0.120388 1.022669 0.001387 0.001365 0.007021
3 1648.160056 10307.457474 220.627384 220.233253 148.755978 140.550939 0.000587 10269 0.021405 0.021366 0.120133 0.116773 1.001790 0.001185 0.001152 0.011182

Interactive plots

In [105]:
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly import tools
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=False)
In [106]:
df.filter(regex='wt').head(2)
Out[106]:
wt_mag_mono wt_mag_chro wt_mag eT_mono_times_wt eT_chro_times_wt eX_monosq_times_wt eX_chrosq_times_wt eXsq_times_wt eX_monosq_times_wt_std eX_chrosq_times_wt_std
0 1.093726 1.091908 1.092817 0.017889 0.042073 0.001458 0.001776 0.001617 0.055519 0.054639
1 0.914694 0.913215 0.913955 0.033682 0.015773 0.000693 0.000024 0.000358 0.055519 0.054639
In [107]:
# I need pandas > 0.25 for multiple agg
pd.__version__
Out[107]:
'1.0.3'
In [108]:
def plotly_eT_plot(rmin=400, dlnr=0.4, min_bin_count=20,
                   show_mono_chro=False,
                   show_mono=False,
                   show_chro=False,
                   show_data=True,
                  ):
    
    # title
    title=f'rmin={rmin}, dlnr={dlnr}, min_bin_count={min_bin_count}'
    
    # radial bin
    df['bin'] = ( np.log(df.r / rmin) / dlnr).astype(int)

    # aggregations per given bin
    df_radial_bins = df.groupby('bin').agg(
        r_mean                 = ('r', 'mean'),
        wt_mag_sum             = ('wt_mag', 'sum'),
        eT_mono_times_wt_sum   = ('eT_mono_times_wt', 'sum'),
        eT_chro_times_wt_sum   = ('eT_chro_times_wt', 'sum'),
        eX_monosq_times_wt_sum = ('eX_monosq_times_wt', 'sum'),
        eX_chrosq_times_wt_sum = ('eX_chrosq_times_wt', 'sum'),
        err_numerator_mean     = ('err_numerator', 'mean')
                                           )
    

    # bin counts
    df_radial_bins['bin_count'] = df['bin'].value_counts()
    df_radial_bins = df_radial_bins.query(""" bin_count > @min_bin_count """) 
    df_radial_bins = df_radial_bins.iloc[:-1,:]
        
    # columns after binning 
    df_radial_bins['eT_mean_mono'] = (df_radial_bins['eT_mono_times_wt_sum'] / 
                                      df_radial_bins['wt_mag_sum'])

    df_radial_bins['eT_mean_chro'] = (df_radial_bins['eT_chro_times_wt_sum'] /
                                      df_radial_bins['wt_mag_sum'])

    df_radial_bins['eX_mean_mono'] = np.sqrt(
        df_radial_bins['eX_monosq_times_wt_sum'] /
        df_radial_bins['wt_mag_sum']
    )

    df_radial_bins['eX_mean_chro'] = np.sqrt(
        df_radial_bins['eX_chrosq_times_wt_sum'] /
        df_radial_bins['wt_mag_sum']
    )

    df_radial_bins['eT_ratio']     = (df_radial_bins['eT_mean_mono'] /
                                      df_radial_bins['eT_mean_chro'])

    
    # Errors
    sqrt_bin = np.sqrt(df_radial_bins['bin_count'])
    df_radial_bins['eT_mono_err'] = df_radial_bins['eX_mean_mono'] / sqrt_bin
    df_radial_bins['eT_chro_err'] = df_radial_bins['eX_mean_chro'] / sqrt_bin
    df_radial_bins['eT_ratio_err'] = ( np.sqrt(df_radial_bins['err_numerator_mean']/
                                  df_radial_bins['eT_mean_mono'] /
                                  df_radial_bins['eT_mean_chro']) /
                                   sqrt_bin)
    
    # remove first bin of strong lensing
    df_radial_bins = df_radial_bins.iloc[1:,:]
    
    # eT mono vs chro ratio
    sc = go.Scatter( x=df_radial_bins['r_mean'],
                     y=df_radial_bins['eT_ratio'],
                     mode = 'lines+markers',
                     name = 'eT ratio',
                     error_y = dict(
                         type='data',
                         array=df_radial_bins['eT_ratio_err'],
                         visible=True,
                         )
                   )
    
    mydata = [sc]
    
    # layout
    layout = go.Layout(
                    title={
                        'text': title,
                        'y':0.9,
                        'x':0.5,
                        'xanchor': 'center',
                        'yanchor': 'top'},
                    xaxis=dict(
                               title='radius',
                               titlefont=dict(
                               family='Courier New, monospace',
                               size=18,
                               color='#7f7f7f')),
                     yaxis=dict(title='eT',
                                titlefont=dict(
                                          family='Courier New, monospace',
                                          size=18,
                                          color='#7f7f7f')))
    
    myfig = go.Figure(data=mydata, layout=layout)

    
    # also plot eT mono and chro
    # monochromatic
    sc1 = go.Scatter(x=df_radial_bins['r_mean'],
                     y=df_radial_bins['eT_mean_mono'],
                     mode = 'lines+markers',
                     name = 'eT mono',
                     error_y = dict(
                         type='data',
                         array=df_radial_bins['eT_mono_err'],
                         visible=True,
                         )
                    )
        # chromatic
    sc2 = go.Scatter(x=df_radial_bins['r_mean'],
                     y=df_radial_bins['eT_mean_chro'],
                     mode = 'lines+markers',
                     name = 'eT chro',
                     error_y = dict(
                         type='data',
                         array=df_radial_bins['eT_chro_err'],
                         visible=True,
                         )
                    )
    if show_mono_chro:
        mydata= [sc1,sc2]
        myfig = go.Figure(data=mydata, layout=layout)
        
    if show_mono:
        mydata = [sc1]
        myfig = go.Figure(data=mydata, layout=layout)
        myfig.update_layout(
                    xaxis_title="Radius",
                    yaxis_title="eT_mono",
                )
        myfig.update_layout(
                    title={
                        'text': "eT_mono vs radius plot",
                        'y':0.9,
                        'x':0.5,
                        'xanchor': 'center',
                        'yanchor': 'top'}
        )
        
    if show_chro:
        mydata = [sc2]
        myfig = go.Figure(data=mydata, layout=layout)
        myfig.update_layout(
                    xaxis_title="Radius",
                    yaxis_title="eT_chro",
                )
        myfig.update_layout(
                    title={
                        'text': "eT_chro vs radius plot",
                        'y':0.9,
                        'x':0.5,
                        'xanchor': 'center',
                        'yanchor': 'top'})
    
    # this is for all cases
    myfig.update_layout(showlegend=True)

    
    # also show the dataframe
    if show_data:
        display(df_radial_bins.style.background_gradient(subset=['eT_mean_mono']))

    # iplot plots in jupyter-notebook, plot opens in new tab.
    return iplot(myfig, filename='myplot.html')

# plot
plotly_eT_plot(rmin=400, dlnr=0.4, min_bin_count=20,
                   show_mono_chro=False,
                   show_mono=False,
                   show_chro=False,
                   show_data=True,
                  )
r_mean wt_mag_sum eT_mono_times_wt_sum eT_chro_times_wt_sum eX_monosq_times_wt_sum eX_chrosq_times_wt_sum err_numerator_mean bin_count eT_mean_mono eT_mean_chro eX_mean_mono eX_mean_chro eT_ratio eT_mono_err eT_chro_err eT_ratio_err
bin
-2 152.742316 179.285406 64.249036 63.638269 14.878388 14.888185 0.000587 182 0.358362 0.354955 0.288075 0.288170 1.009597 0.021354 0.021361 0.005036
-1 225.506055 352.749423 77.155052 75.297039 13.509578 12.861971 0.000587 343 0.218725 0.213458 0.195699 0.190950 1.024676 0.010567 0.010310 0.006056
0 451.573352 2151.984136 240.306878 236.084853 47.031784 44.565504 0.000587 2154 0.111668 0.109706 0.147835 0.143906 1.017884 0.003185 0.003101 0.004717
1 752.737569 3060.641678 176.982232 172.372129 49.693588 47.274357 0.000587 3080 0.057825 0.056319 0.127422 0.124282 1.026745 0.002296 0.002239 0.007652
2 1118.311598 6593.198382 251.065291 245.131075 99.333497 96.451812 0.000587 6616 0.038079 0.037179 0.122744 0.120950 1.024208 0.001509 0.001487 0.007918
3 1600.747986 9740.625942 221.288795 221.524726 140.237835 132.226479 0.000587 9701 0.022718 0.022742 0.119988 0.116511 0.998935 0.001218 0.001183 0.010824
In [109]:
plotly_eT_plot(rmin=300, dlnr=0.5,min_bin_count=20,show_mono_chro=True)
r_mean wt_mag_sum eT_mono_times_wt_sum eT_chro_times_wt_sum eX_monosq_times_wt_sum eX_chrosq_times_wt_sum err_numerator_mean bin_count eT_mean_mono eT_mean_chro eX_mean_mono eX_mean_chro eT_ratio eT_mono_err eT_chro_err eT_ratio_err
bin
-1 150.652687 200.628951 74.882784 74.090191 17.787456 17.727269 0.000587 204 0.373240 0.369290 0.297756 0.297252 1.010698 0.020847 0.020812 0.004570
0 358.032309 1676.933131 245.470683 239.614595 43.917687 41.394924 0.000587 1666 0.146381 0.142889 0.161831 0.157114 1.024440 0.003965 0.003849 0.004105
1 668.376358 3012.449472 199.069787 195.618177 53.845246 51.572057 0.000587 3033 0.066082 0.064937 0.133695 0.130842 1.017645 0.002428 0.002376 0.006717
2 1096.742691 7757.245684 306.984092 300.179360 116.155354 112.428149 0.000587 7780 0.039574 0.038697 0.122367 0.120388 1.022669 0.001387 0.001365 0.007021
3 1648.160056 10307.457474 220.627384 220.233253 148.755978 140.550939 0.000587 10269 0.021405 0.021366 0.120133 0.116773 1.001790 0.001185 0.001152 0.011182
In [110]:
plotly_eT_plot(rmin=300, dlnr=0.5,min_bin_count=20,show_data=False,show_mono=True)
In [111]:
plotly_eT_plot(rmin=300, dlnr=0.5,min_bin_count=20,show_data=False,show_chro=True)

ipywidgets

In [112]:
import ipywidgets as widgets
from ipywidgets import HBox, VBox
from ipywidgets import interactive
In [113]:
rmin_ = widgets.IntSlider(min=0, max=500, step=50, value=300)
dlnr_  = widgets.FloatSlider(min=0.1,max=0.8,step=0.05,value=0.5)
min_bin_count_ = widgets.IntSlider(min=10, max=600, step=20, value=20)
interactive_plot = interactive(plotly_eT_plot,
                               rmin=rmin_,
                               dlnr=dlnr_,
                               min_bin_count=min_bin_count_)

output = interactive_plot.children[-1]
interactive_plot

Time Taken

In [114]:
time_taken = time.time() - time_start_notebook
h,m = divmod(time_taken,60*60)
print('Time taken to run whole notebook: {:.0f} hr '\
      '{:.0f} min {:.0f} secs'.format(h, *divmod(m,60)))
Time taken to run whole notebook: 0 hr 0 min 42 secs
In [ ]:
%%bash
python -m nbconvert *.ipynb
mkdir html
mv *.html html/
python get_nbviewer_links.py
In [ ]: